Read in the data

#normal data
ed <- fread(here::here("data/education/combined_education.csv")) %>% 
  mutate(year = factor(year))
ed$year <- revalue(ed$year,c("1415" = "2014-2015", "1516" = "2015-2016", "1617" = "2016-2017",
          "1718" = "2017-2018", "1819" = "2018-2019"))
ed <- ed %>% filter(District.Name == "South Wasco County SD 1" |
           District.Name == "Jefferson County SD 509J" |
           District.Name == "North Wasco County SD 21" |
           District.Name == "Sherman County SD" |
           District.Name == "Dufur SD 29" |
           District.Name == "Hood River County SD") %>% 
  select(-c("District.ID" ,"Student.Group","Fall.Membership", "Free.Reduced.Priced.Lunch","Percent.Regular.Attenders" )) 

ed.increase <- ed %>% select("year" ,"District.Name","On.Time.Grad.Rate", "Teacher.Experience.Pct","Percent.ELA.Proficient.Change")
ed.decrease <- ed %>% select("year" ,"District.Name","Percent.Economically.Disadvantaged", "Percent.Chronically.Absent","Dropout.Rate")

#scaling the data within each measure to standardize the values.
#con: suppresses the minor differences in variables that have a smaller range
ed.scaled <- ed %>% mutate(Percent.ELA.Proficient.Change = scale(Percent.ELA.Proficient.Change),
                                  Percent.Chronically.Absent = scale(Percent.Chronically.Absent),
                                  Percent.Economically.Disadvantaged = scale(Percent.Economically.Disadvantaged),
                                  Teacher.Experience.Pct = scale(Teacher.Experience.Pct),
                                  Dropout.Rate = scale(Dropout.Rate),
                                  On.Time.Grad.Rate = scale(On.Time.Grad.Rate))
ed.scaled <- ed.scaled %>% filter(District.Name == "South Wasco County SD 1" |
                      District.Name == "Jefferson County SD 509J" |
                      District.Name == "North Wasco County SD 21" |
                      District.Name == "Sherman County SD" |
                      District.Name == "Dufur SD 29" |
                      District.Name == "Hood River County SD")

prepare table to make a heatmap

## for all the education data with original values
ed.melt = melt(ed, id.vars = c("year", "District.Name"),
             measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" , 
                              "Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
                              "Percent.Economically.Disadvantaged"))

## for the education data with scaled values
ed.melt.scaled = melt(ed.scaled, id.vars = c("year", "District.Name"),
               measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" , 
                                "Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
                                "Percent.Economically.Disadvantaged")) 

### increaseing variables
ed.melt.increase = melt(ed.increase, id.vars = c("year", "District.Name"),
               measure.vars = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
                                "Percent.ELA.Proficient.Change")) %>%
  mutate(variable = factor(variable, levels = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
                                "Percent.ELA.Proficient.Change")),
         variable = recode(variable, "On.Time.Grad.Rate" = "On Time Graduation",
                            "Teacher.Experience.Pct" = "Teacher Experience",
                            "Percent.ELA.Proficient.Change" = "ELA Proficiency Change"))
ed.melt.decrease = melt(ed.decrease, id.vars = c("year", "District.Name"),
               measure.vars = c("Percent.Economically.Disadvantaged", "Percent.Chronically.Absent",
                                "Dropout.Rate")) %>%
  mutate(variable = recode(variable, "Percent.Economically.Disadvantaged" = "Economically Disadvantaged",
                           "Percent.Chronically.Absent" = "Chronic Absenteeism",
                           "Dropout.Rate"="Dropout Rate"))

try making divergent red, white and blue color palette

img <- function(obj, nam) {
  image(1:length(obj), 1, as.matrix(1:length(obj)), col=obj, 
        main = nam, ylab = "", xaxt = "n", yaxt = "n",  bty = "n")
}
rwb <- colorRampPalette(colors = c("red", "white", "blue"))
img(rwb(100), "red-white-blue")

colorRampPalette(c("red", "blue"))(25)
##  [1] "#FF0000" "#F4000A" "#E90015" "#DF001F" "#D4002A" "#C90035" "#BF003F"
##  [8] "#B4004A" "#AA0055" "#9F005F" "#94006A" "#8A0074" "#7F007F" "#74008A"
## [15] "#6A0094" "#5F009F" "#5500AA" "#4A00B4" "#3F00BF" "#3500C9" "#2A00D4"
## [22] "#1F00DF" "#1500E9" "#0A00F4" "#0000FF"

Heat Maps

Original Scale

for south wasco alone:

sw <- filter(ed.melt, District.Name == "South Wasco County SD 1")
ggplot(sw, aes(y = variable, x = factor(year), fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  scale_fill_viridis() +
  coord_equal()

Facet wrap to show all schools

ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  scale_fill_viridis(option = "D") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).

Trying with rwb divergent palette: not working - Continuous value supplied to discrete scale.

continuous scale palettes: https://biostats.w.uib.no/color-scale-for-continuous-variables/ controlling midpoint and range for color palettes: https://stackoverflow.com/questions/58718527/setting-midpoint-for-continuous-diverging-color-scale-on-a-heatmap

# interval <- c(-60, -20, -10, -5, seq(0,100,5))
# data.values <- as.vector(na.omit(ed.melt$value))
# color_rwb <- cut(data.values, breaks=interval, labels = rwb(24))
pgcol <- brewer.pal(9, "PRGn")
pgpal <- colorRampPalette(pgcol)
ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0)
## Warning: Removed 12 rows containing missing values (geom_text).

  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("School Year")
## List of 2
##  $ axis.text.x:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ x          : chr "School Year"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

for variables where increasing is good

ggplot(ed.melt.increase, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent") 
## Warning: Removed 12 rows containing missing values (geom_text).

# and all in one instead of facet
ed.melt.increase2 <- ed.melt.increase %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.increase2, aes(y = indicator, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  
   ## banners
  geom_hline(yintercept = 6.5, color = "white", lwd = 5) + 
  geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
  geom_hline(yintercept = 18.5, color = "white", lwd = 5) +

  # Title of variables
  annotate("text", x=2, y=6.5,label="ELA Proficiency Change", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=12.5,label="On Time Graduation", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=18.5,label="Teacher Experience", fontface=2, size = 3, color="black") +
  
  #scale_y_discrete(limit = rep(c("South Wasco County SD 1", "Sherman County SD" , "North Wasco County SD 21",
                             #"Jefferson County SD 509J", "Hood River County SD","Dufur SD 29"), 3)) +
  xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).

Variables where increaseing is meh

ggplotly(ggplot(ed.melt.decrease, aes(y = variable, x = year, fill = value,
                              text = paste0("School District: ", District.Name,
                                "<br>Year: ", year,
                                "<br>Percent ", variable, ": ", value, "%"))) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_sequential(palette = "Purples 3") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(title = "Barriers to Student Success", 
       x ="School Year", y = "", fill="Percent"), 
  tooltip = "text")%>% 
  config(displayModeBar = "static", displaylogo = FALSE, 
         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian",
                                     "hoverCompareCartesian","resetScale2d")) 
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# and all in one instead of facet
ed.melt.decrease2 <- ed.melt.decrease %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.decrease2, aes(y = indicator, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
  scale_fill_continuous_sequential(palette = "Purples 3") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ## banners
  geom_hline(yintercept = 6.5, color = "white", lwd = 5) + 
  geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
  geom_hline(yintercept = 18.5, color = "white", lwd = 5) +

  # Title of variables
  annotate("text", x=2, y=6.5,label="Chronic Absenteeism", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=12.5,label="Dropout", fontface=2, size = 3, color="black") +
  annotate("text", x=2, y=18.5,label="Economically Disadvantaged", fontface=2, size = 3, color="black") +
  
  xlab("School Year")

get everything absolutely on one plot (not faceted)

ed.melt2 <- ed.melt %>% mutate(indicator = paste(variable, District.Name, sep= "-"),
                               indicator = factor(indicator, 
                                                  levels=c("On.Time.Grad.Rate-South Wasco County SD 1",
                                                           "On.Time.Grad.Rate-North Wasco County SD 21",
                                                           "On.Time.Grad.Rate-Dufur SD 29",
                                                           "On.Time.Grad.Rate-Hood River County SD" ,
                                                           "On.Time.Grad.Rate-Sherman County SD",
                                                           "On.Time.Grad.Rate-Jefferson County SD 509J",
                                                           
                                                           "Dropout.Rate-South Wasco County SD 1",
                                                           "Dropout.Rate-North Wasco County SD 21",
                                                           "Dropout.Rate-Dufur SD 29",
                                                           "Dropout.Rate-Hood River County SD" ,
                                                           "Dropout.Rate-Sherman County SD",
                                                           "Dropout.Rate-Jefferson County SD 509J",
                                                           
                                                           "Percent.ELA.Proficient.Change-South Wasco County SD 1",
                                                           "Percent.ELA.Proficient.Change-North Wasco County SD 21",
                                                           "Percent.ELA.Proficient.Change-Dufur SD 29",
                                                           "Percent.ELA.Proficient.Change-Hood River County SD" ,
                                                           "Percent.ELA.Proficient.Change-Sherman County SD",
                                                           "Percent.ELA.Proficient.Change-Jefferson County SD 509J",
                                                           
                                                           "Teacher.Experience.Pct-South Wasco County SD 1",
                                                           "Teacher.Experience.Pct-North Wasco County SD 21",
                                                           "Teacher.Experience.Pct-Dufur SD 29",
                                                           "Teacher.Experience.Pct-Hood River County SD" ,
                                                           "Teacher.Experience.Pct-Sherman County SD",
                                                           "Teacher.Experience.Pct-Jefferson County SD 509J",
                                                           
                                                           "Percent.Chronically.Absent-South Wasco County SD 1",
                                                           "Percent.Chronically.Absent-North Wasco County SD 21",
                                                           "Percent.Chronically.Absent-Dufur SD 29",
                                                           "Percent.Chronically.Absent-Hood River County SD" ,
                                                           "Percent.Chronically.Absent-Sherman County SD",
                                                           "Percent.Chronically.Absent-Jefferson County SD 509J",
                                                           
                                                           "Percent.Economically.Disadvantaged-South Wasco County SD 1",
                                                           "Percent.Economically.Disadvantaged-North Wasco County SD 21",
                                                           "Percent.Economically.Disadvantaged-Dufur SD 29",
                                                           "Percent.Economically.Disadvantaged-Hood River County SD" ,
                                                           "Percent.Economically.Disadvantaged-Sherman County SD",
                                                           "Percent.Economically.Disadvantaged-Jefferson County SD 509J")))
  
ggplot(ed.melt2, aes(y = indicator, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  #geom_text(aes(label = round(value,1)), color = "black") +
  coord_equal() + 
  #scale_fill_viridis(n=25, option = "D",breaks = c(-60, -20, -10, -5, seq(0,100,5))) +
  scale_fill_viridis(option = "D") + 
  # Horizontal lines to section off domains
  geom_hline(yintercept = 6.5, color = "white", lwd = 4) + 
  geom_hline(yintercept = 12.5, color = "white", lwd = 4) +
  geom_hline(yintercept = 18.5, color = "white", lwd = 4) +
  geom_hline(yintercept = 24.5, color = "white", lwd = 4) +
  geom_hline(yintercept = 30.5, color = "white", lwd = 4) +
  geom_hline(yintercept = 36.5, color = "white", lwd = 4) +
  # Title of variables
  annotate("text", x=2, y=6.5,label="Dropout", fontface=2, size = 2, color="black") +
  annotate("text", x=2, y=12.5,label="On Time Graduation", fontface=2, size = 2, color="black") +
  annotate("text", x=2, y=18.5,label="Chronic Absenteeism", fontface=2, size = 2, color="black") +
  annotate("text", x=2, y=24.5,label="Economically Disadvantaged", fontface=2, size = 2, color="black") +
  annotate("text", x=2, y=30.5,label="ELA Proficiency Change 8th-3rd Grade", fontface=2, size = 2, color="black") +
  annotate("text", x=2, y=36.5,label="Teacher Experience", fontface=2, size = 2, color="black") +
  
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("School Year")

Makes it alot easier to do direct comparisons of schools and years for one variable. but interpretability is still weak.

Standardized measures

Facet wrap but with scaled/standardized measures

ggplot(ed.melt.scaled, aes(y = variable, x = year, fill = value)) +
  geom_tile(color = "#ADB5BD") + #gray
  geom_text(aes(label = round(value,1)), color = "black") + 
  coord_equal() + facet_wrap(~District.Name) +
  #scale_fill_viridis(breaks = c(-60, -20, -10, -5, seq(0,100,5))) + 
  scale_fill_viridis() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).